import pandas as pd
import pymysql
import random
import numpy as np

def get_down_rating():
    p = np.array([1.0-0.3, 0.3])
    show = np.random.choice([0, 1], p = p.ravel())
    a = random.randint(10,15)/1000.0
    adNum = random.randint(1,8)

    rating = 0

    for index in range(0,adNum):
        p = np.array([1.0-a, a])
        ide = np.random.choice([0, 1], p = p.ravel())
        if ide == 1:
            rating = rating+0.95
        else:
            rating = rating-0.05

    return show,rating

def get_top_rating():
    p = np.array([1.0-0.6, 0.6])
    show = np.random.choice([0, 1], p = p.ravel())
    a = random.randint(70,100)/1000.0
    adNum = random.randint(1,8)

    rating = 0

    for index in range(0,adNum):
        p = np.array([1.0-a, a])
        ide = np.random.choice([0, 1], p = p.ravel())
        if ide == 1:
            rating = rating+0.95
        else:
            rating = rating-0.05

    return show,rating

users_df = []
ad_id_df = []
rating_df = []

def generate_user_action(ad1s,users,count):
    # 随机选取6位user给ad1
    user_ad1s = random.sample(users,count)
    for ad1 in ad1s:
        user_ad1s_d = random.sample(users,len(users))
        for user_ad1 in user_ad1s:
            show,rating = get_top_rating()
            user_ad1s_d.remove(user_ad1)
            if show == 1:
                users_df.append(user_ad1)
                ad_id_df.append(ad1)
                rating_df.append(rating)

        for user_ad1 in user_ad1s_d:
            show,rating = get_down_rating()
            if show == 1:
                users_df.append(user_ad1)
                ad_id_df.append(ad1)
                rating_df.append(rating)

if __name__=="__main__":

    ad1s = [101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,1399,140,141,142,143,144,145,146,147]
    ad2s = [201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226]
    ad3s = [301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335]
    ad4s = [401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422]
    ad5s = [501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532]
    ad6s = [601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619]

    ads = ad1s+ad2s+ad3s+ad4s+ad5s+ad6s

    print(len(ads))

    users = range(100,120100)

    generate_user_action(ad1s,users,40200)
    generate_user_action(ad2s,users,30820)
    generate_user_action(ad3s,users,41200)
    generate_user_action(ad4s,users,25200)
    generate_user_action(ad5s,users,41750)
    generate_user_action(ad6s,users,30750)

    test_dict_df = pd.DataFrame({'user':users_df,'ad_id':ad_id_df,'rating':rating_df})
    test_dict_df.to_csv("user_ad_rating.csv",index=False)






